#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;

namespace Irony.Compiler {

  //Scanner class. The Scanner's function is to transform a stream of characters into bigger aggregates/words or lexemes, 
  // like identifier, number, literal, etc. 

  public class Scanner  {
    public Scanner(GrammarData data)  {
      _data = data;
    }

    #region Fields: _data, _source, _context, _caseSensitive, _currentToken
    GrammarData _data;
    ISourceStream  _source;
    CompilerContext  _context;
    bool _caseSensitive;
    Token _currentToken;
    #endregion

    #region Events: TokenCreated
    //Note that scanner's output stream may not contain all tokens received by parser. Additional tokens
    // may be generated by intermediate token filters. To listen to token stream at parser input, 
    // use Parser's TokenReceived event. 
    public event EventHandler<TokenEventArgs> TokenCreated;
    TokenEventArgs _tokenArgs = new TokenEventArgs(null);

    protected void OnTokenCreated(Token token) {
      if (TokenCreated == null) return;
      _tokenArgs.Token = token;
      TokenCreated(this, _tokenArgs);
    }
    #endregion

    public void Prepare(CompilerContext context, ISourceStream source) {
      _context = context;
      _caseSensitive = context.Compiler.Grammar.CaseSensitive;
      _source = source;
      ResetSource();
    }

    //Use this method in real compiler, in iterator-connected pipeline
    public IEnumerable<Token> BeginScan() {
      while (!_source.EOF()) {
        _currentToken = ReadToken();
        if (TokenCreated != null)
          OnTokenCreated(_currentToken);
        //if (tkn.Terminal.Category != TerminalCategory.Comment)
        yield return _currentToken;
        if (_currentToken.Terminal == Grammar.Eof)
          yield break;
      }//while
    }// method

    //Use this method for VS integration; VS language package requires scanner that returns tokens one-by-one. 
    // Start and End positions required by this scanner may be derived from Token : 
    //   start=token.Location.Position; end=start + token.Length;
    // state is not used now - maybe in the future
    public Token GetNext(ref int state) {
      return ReadToken();
    }

    private Token ReadToken() {
      SkipWhitespace();
      SetTokenStartLocation();
      //Check for EOF
      if (_source.EOF()) 
        return new Token(Grammar.Eof, _source.TokenStart, string.Empty, Grammar.Eof.Name);
      //Find matching terminal
      TerminalList terms = SelectTerminals(_source.CurrentChar);
      Token result = null;  //normal successful token
      Token errorResult = null;  //error token returned by some terminal - used when other terminals returned nothing
      foreach (Terminal term in terms) {
        // Check if the term has lower priority that result token we already have; 
        //  if term.Priority is lower then we don't need to check anymore, higher priority wins
        // Note that terminals in the list are sorted in descending priority order
        if (result != null && result.Terminal.Priority > term.Priority)
          break; 
        //Reset source position and try to match
        _source.Position = _source.TokenStart.Position;
        Token token = term.TryMatch(_context, _source);
        if (token != null) {
          if (token.Terminal.Category == TokenCategory.Error) {
            errorResult = token; //remember error token to return it if other terminals cannot construct anything
            continue;
          }
          //Take this token as result only if we don't have anything yet, or if it is longer token than previous
          if ((result == null || token.Length > result.Length)) 
            result = token;
        }//if token != null
      }
      //If we don't have a token, try Grammar's method
      if (result == null) {
        Token token = _data.Grammar.TryMatch(_context, _source);
        if (token != null) {
          if (token.IsError())
            errorResult = token;
          else
            result = token;
        }
      }//if result == null
      //If we have normal token then return it
      if (result != null) {
        //restore position to point after the result token
        _source.Position = _source.TokenStart.Position + result.Length; 
        return result;
      } 
      //we have an error: either errorResult token or no token at all
      if (errorResult == null) //if no error  result then create it
        errorResult = Grammar.CreateSyntaxErrorToken(_source.TokenStart, "Invalid character: '{0}'", _source.CurrentChar);
      Recover();
      return errorResult;
    }//method

    public TerminalList SelectTerminals(char current) {
      TerminalList result;
      if (!_caseSensitive)
        current = char.ToLower(current);
      if (_data.TerminalsLookup.TryGetValue(current, out result))
        return result;
      else
        return _data.TerminalsWithoutPrefixes;
    }//Select

    private void Recover() {
      while (!_source.EOF() && _data.ScannerRecoverySymbols.IndexOf(_source.CurrentChar) < 0)
        _source.Position++;
    }

    public override string ToString() {
      return _source.ToString(); //show 30 chars starting from current position
    }

    #region TokenStart calculations
    private int _nextNewLinePosition = -1; //private field to cache position of next \n character
    public void ResetSource() {
      _source.Position = 0;
      _source.TokenStart = new SourceLocation();
      _nextNewLinePosition = _source.Text.IndexOf('\n');
    }
    private void SkipWhitespace() {
      //skip whitespace
      string wspace = _data.Grammar.WhitespaceChars;
      while (!_source.EOF() && wspace.IndexOf(_source.CurrentChar) >= 0)
        _source.Position++;
    }

    //This is all about source scanning optimization - this seemingly strange code is aimed at improving perfomance,
    // so keep this in mind when reading it. 
    internal void SetTokenStartLocation() {
      //cache values in local variables
      SourceLocation tokenStart = _source.TokenStart;
      int newPosition = _source.Position;
      string text = _source.Text;

      // Currently TokenStart field contains location (pos/line/col) of the last created token. 
      // First, check if new position is in the same line; if so, just adjust column and return
      //  Note that this case is not line start, so we do not need to check tab chars (and adjust column) 
      if (newPosition <= _nextNewLinePosition || _nextNewLinePosition < 0) {
        tokenStart.Column += newPosition - tokenStart.Position;
        tokenStart.Position = newPosition;
        _source.TokenStart = tokenStart;
        return;
      }
      //So new position is on new line (beyond _nextNewLinePosition)
      //First count \n chars in the string fragment
      int lineStart = _nextNewLinePosition;
      int nlCount = 1; //we start after old _nextNewLinePosition, so we count one NewLine char
      ScanTextForChar(text, '\n', lineStart + 1, newPosition - 1, ref nlCount, ref lineStart);
      tokenStart.Line += nlCount;
      //at this moment lineStart is at start of line where newPosition is located 
      //Calc # of tab chars from lineStart to newPosition to adjust column#
      int tabCount = 0;
      int dummy = 0;
      if (_source.TabWidth > 1)
        ScanTextForChar(text, '\t', lineStart, newPosition - 1, ref tabCount, ref dummy);

      //adjust TokenStart with calculated information
      tokenStart.Position = newPosition;
      tokenStart.Column = newPosition - lineStart - 1;
      if (tabCount > 0)
        tokenStart.Column += (_source.TabWidth - 1) * tabCount; // "-1" to count for tab char itself

      //finally cache new line and assign TokenStart
      _nextNewLinePosition = text.IndexOf('\n', newPosition);
      _source.TokenStart = tokenStart;
    }

    private static void ScanTextForChar(string text, char ch, int from, int until, ref int count, ref int lastPosition) {
      if (from > until) return;
      while (true) {
        int next = text.IndexOf(ch, from, until - from + 1);
        if (next < 0) return;
        lastPosition = next;
        count++;
        from = next + 1;
      }

    }
    #endregion


  }//class

}//namespace
